/*
Input: newdata/estimation_prep_ltw18 [prepared precinct-level panel]


Output: 
	- Table E.2, E.3, E.4
	- Figure 6
	
Tasks:
	- Summary Statistics Table
	- Balancing exercise: Correlation of reassignment timing and changes in 
		precinct characteristics, standardized and nonstandardized characteristics


*/	
	
* PULL: Stimmbezirk-level data
	use "$newdata/estimation_prep_ltw18.dta", clear
	
	
	
********************************************************************************
*	 Prep *
********************************************************************************
	
	xtset sb_new wahl_id
	
	// Relabel Outcomes
	lab var turnout_urne 	"Polling Place Turnout"
	lab var turnout_pos_req "Mail-in Turnout (Requested Polling Cards)"
	lab var turnout_tot_req "Total Turnout"
	
	//Relabel variables of interest
	lab var street_dist 	"Avg. Walking Distance to the Polling Place (in km)"
	lab var treat_simple	"Share of Reassigned Residential Addresses"
	lab var treat_consol	"Share Reassigned (Precinct Reconfiguration)"
	lab var treat_no_consol	"Share Reassigned (Recruitment of Polling Location)"
	
	// Relabel Precinct Characteristics for Summary Table 
	lab var ew_ges 		"# residents"
	lab var wb_anteil 	"\% Residents Eligible to Vote"
	lab var ew_dtmihi 	"\% Non-native German Residents"
	lab var ew_biodt	"\% Native German Residents"
	lab var ew_ausl_eu	"\% EU Foreigners"
	lab var ew_ausl_else "\% Non-EU Foreigners"
	lab var ew_ledig	"\% Single Residents"
	lab var ew_married 	"\% Married Residents"
	lab var wb_18t24 	"\% Electorate Aged 18-24"
	lab var wb_25t34 	"\% Electorate Aged 25-34"
	lab var wb_35t44 	"\% Electorate Aged 35-44"
	lab var wb_45t59 	"\% Electorate Aged 45-59"
	lab var wb_60plus 	"\% Electorate Aged 60+"
	lab var wb_ausl		"\% EU Foreigners in the Electorate"
	lab var hh_kids		"\% Households with Children"
	lab var avg_dur		"Avg. Duration of Residence"
	lab var mpreis_flats_rent "Avg. Quoted Rent per sqm"

	// Relabel Precinct Characteristics for Balance Table 
	lab var abs_ew_ges 		"# Residents"
	lab var abs_ew_ledig 	"# Single Residents"
	lab var abs_ew_married 	"# Married Residents"
	lab var abs_ew_biodt 	"# Native German Residents"
	lab var abs_ew_dtmihi 	"# Non-native German Residents"
	lab var abs_ew_ausl 	"# Foreign Residents"
	lab var abs_wb_anteil	"# Eligible Voters"
	lab var abs_wb_18t24	"# Eligible Voters Aged 18-24"
	lab var abs_wb_25t34	"# Eligible Voters Aged 25-34"
	lab var abs_wb_35t44 	"# Eligible Voters Aged 35-44"
	lab var abs_wb_45t59	"# Eligible Voters Aged 45-59"
	lab var abs_wb_60plus 	"# Eligible Voters Aged 60+"
	lab var abs_wb_dt 		"# German Eligible Voters"
	lab var abs_wb_ausl 	"# EU Foreigners in the Electorate"
	lab var hh_kids 		"\% Households with Children"
	lab var mpreis_flats_rent "Avg. Quoted Rent per sqm"
	lab var avg_dur			"Avg. Duration of Residence"
	lab var withmig			"# Within Migration"
	lab var outmig			"# Outmigration"
	lab var inmig			"# Inmigration"
	
********************************************************************************
*	Summary Statistics *
********************************************************************************
* Summary Statistics: Precinct (converted SE-18)
	local outcomes 	"turnout_urne turnout_pos_req turnout_tot_req"
	local varint	"street_dist treat_simple treat_consol treat_no_consol"
	local sum_ctr	"ew_ges wb_anteil ew_dtmihi ew_biodt ew_ausl_eu ew_ausl_else ew_ledig ew_married wb_18t24 wb_25t34 wb_35t44 wb_45t59 wb_60plus wb_ausl hh_kids avg_dur mpreis_flats_rent"
		
	*change labels for non-standardized variables
		lab var mpreis_flats_rent 	"Avg. Quoted Rent per sqm (in euros)"
		lab var avg_dur				"Avg. Duration of Residence (in years)"
	
	* TABLE E2. Summary Statistics of Precinct Characteristics
	eststo clear
	estpost summarize `outcomes' `varint' `sum_ctr', det
	esttab using "${tables}/Table_E2_sumstat.tex", replace label substitute(# \#) ///
		fragment nomtitle nonumber noobs ///
			cells("mean(label(Mean) fmt(2)) sd(label(Std. Dev.) fmt(2)) min(label(Min) fmt(2)) p25(fmt(2)) p50(label(Median) fmt(2)) p75(fmt(2)) max(label(Max) fmt(2))")

	
********************************************************************************
* Is reassignment timing corr w/ changes in precinct characteristics? *
********************************************************************************

* Univariate regressions of treatment on precinct characteristics (+ FEs) + check for joint significance
	* Approach: "stack" regressions on top of each other (i.e. duplicate data + include interacted FE)
	* Standardize (unit SD, mean zero) INDEP variables for better exposition
	* Resulting OLS coefs are EQUIVALENT to separate univariate regressions
	* But in this way, we can directly perform an F-test of joint significance
	
	
	// variables on which to check balance
	global bal_test abs_ew_ges abs_ew_ledig abs_ew_married abs_ew_biodt abs_ew_dtmihi abs_ew_ausl abs_wb_anteil ///
					abs_wb_18t24 abs_wb_25t34 abs_wb_35t44 abs_wb_45t59 abs_wb_60plus abs_wb_dt abs_wb_ausl withmig outmig inmig hh_kids mpreis_flats_rent avg_dur 	
	
	*change labels before standardization for figure output
		lab var mpreis_flats_rent 	"Avg. Quoted Rent per sqm"
		lab var avg_dur				"Avg. Duration of Residence"
					
	// z-scores for bal_test variables: gen zs_*
	cap drop zs_*
	foreach v of varlist $bal_test {
		qui su `v'
		gen zs_`v' = (`v'-r(mean))/r(sd)
		local lb: variable lab `v'
		local lb_new=ustrregexra("`lb'","\(thsd\)","")
		local lb_new=ustrregexra("`lb_new'","\\","")
		lab var zs_`v' "`lb_new'"
	}
						
	
cap	frame copy 	default ftest, replace
frame  	ftest {
	
	** i) Prep
		// label outcomes for tables
		lab var parttreat_simple 	"\makecell{Indicator \\ (Reassigned >0)}"
		lab var fulltreat_simple 	"\makecell{Indicator \\ (Reassigned =100)}"		
		lab var treat_simple 		"\makecell{Share \\ Reassigned}"
		lab var treat_no_consol 	"\makecell{Share Reassigned \\ (Recruitment)}"		
		lab var treat_consol 		"\makecell{Share Reassigned \\ (Precinct Reconfig.)}"
		lab var ln_street_dist 		"\makecell{Log \\ Avg. Walking Distance}"
		
		// prep data: duplicate dataset + gen 'dup': dataset_id for FE later
		gen id = _n
		local num : list sizeof global(bal_test)		// extract number of Covariates
		expand `num'									// expand dataset by number of Covariates
		bys id: gen dup = _n							// gen dup: id for each duplicate dataset
		tab dup, gen(d)									// gen dummies for number of identical "datasets"
		
		// gen interaction terms with STANDARDIZED cov: covariate x dataset_id
		local j = 0
		foreach v of varlist /*$bal_test*/ zs_* {			// < z-scores OR abs values
			local j = `j' + 1
			clonevar 	szint_`v' = `v'
			replace 	szint_`v' = d`j' * `v'
		}
		
		// gen interaction terms with NON-stdz cov: covariate x dataset_id
		local j = 0
		foreach v of varlist $bal_test /*zs_*/ {			// < z-scores OR abs values
			local j = `j' + 1
			clonevar 	int_`v' = `v'
			replace 	int_`v' = d`j' * `v'
		}
		
		*change labels for non-standardized case for figure
			lab var int_mpreis_flats_rent "Avg. Quoted Rent per sqm (euros)"
			lab var int_avg_dur			"Avg. Duration of Residence (years)"
	  
	** iia) Run regression with interactions using STDZD covariates
	* Note: OLS coefs equivalent to separate univariate regs
	* 6 Outomes: Dummy(full precinct reass), Dummy(at least part reass), %addresses reass, % addresses reasigned by reason, log walking dist
		outreg, clear
		estimates clear
		foreach depvar of varlist fulltreat_simple parttreat_simple treat_simple  treat_consol treat_no_consol ln_street_dist {
			 reghdfe  `depvar' szint_* $wgt, absorb(i.dup#i.wahl_id i.dup#i.sb_new)  vce(cluster sb_new)
			
			// save coefs 
			estimates store `depvar'_zs
			
			// run F-test: b1 = b2 = ... = 0
			local test ""
			foreach v of varlist szint_*{	
				local test "`test' _b[`v'] = "
			}
			qui test "`test'" 0
			
			// create table
			local obs = `e(N)' / `num' 
			qui outreg, $opt3 noautosumm addrow(Observations, `obs' \"\$ F\$-test on joint insignificance [$ Pr>F$]", "`:di %6.2f `r(F)'' [`:di %6.2f `r(p)'']" \ ///
							Precinct FE, X \ Election FE, X) 
		}
		
	** iib) Export Table for all outcomes
	* TABLE E3. Reassignment Timing and Changes in Precinct Characteristics
		outreg using "$tables/Table_E3_uncond_balance_test_zscores", replay  tex replace fragment
		cleantex 	 "$tables/Table_E3_uncond_balance_test_zscores.tex", nodis replace
				
	** iiia) Run same regression w/ NON-STDZ covariates
		outreg, clear
		foreach depvar of varlist fulltreat_simple parttreat_simple treat_simple  treat_consol treat_no_consol ln_street_dist {
			 reghdfe  `depvar' int_* $wgt, absorb(i.dup#i.wahl_id i.dup#i.sb_new)  vce(cluster sb_new)
			
			
			// run F-test: b1 = b2 = ... = 0
			local test ""
			foreach v of varlist int_*{	
				local test "`test' _b[`v'] = "
			}
			qui test "`test'" 0
			
			// create table
			local obs = `e(N)' / `num' 
			qui outreg, $opt3 noautosumm addrow(Observations, `obs' \ "\$ F\$-test on joint insignificance [$ Pr>F$]", "`:di %6.2f `r(F)'' [`:di %6.2f `r(p)'']" \ ///
							Precinct FE, X \ Election FE, X) 
		}
		
	** iiib) Export Table for all outcomes
	* TABLE E4. Reassignment Timing and Changes in Precinct Characteristics (Non-standardized)
		outreg using "$tables/Table_E4_uncond_balance_test_nosz", replay tex replace fragment
		cleantex 	 "$tables/Table_E4_uncond_balance_test_nosz.tex", nodis replace
	
	
    ** iv) Visualisation with COEFPLOTS (stdz coefs)
	
	* PLOT: FIGURE 6. Reassignment Timing and Changes in Precinct Characteristics
		  coefplot  (fulltreat_simple_zs, mcol(black) ms(O) msize(vsmall) ciopt(color(black) recast(rcap)) levels(95) mlw(.3) legend(off)), ///
				bylabel("{bf:Panel A.}" "Reassigned")  ///
			|| (ln_street_dist_zs, mcol(black) ms(O) msize(vsmall) ciopt(color(black) recast(rcap)) levels(95) mlw(.3) legend(off)), ///
				bylabel("{bf:Panel B.}" "Log Walking Distance") ///
			|| ,  drop(_cons) xline(0, lpattern(solid) lcol(black)) aspect(1) coeflab(,labsize(medsmall)) ///
			xlab(-.05(.05).1,labsize(medsmall) nogrid) xtick(, grid ) ytick(,grid glsty(solid) glcol(black%20))  ///
			subtitle(,nobox bexpand justification(left) size(medsmall)) name(g1, replace)
			
	  coefplot  (treat_consol_zs, mcol(black) ms(O) msize(vsmall) ciopt(color(black) recast(rcap)) levels(95) mlw(.3) legend(off)) , ///
		bylabel("{bf:Panel C.}" "Share Reassigned (Precinct Reconfiguration)")  ///
			|| (treat_no_consol_zs, mcol(black) ms(O) msize(vsmall) ciopt(color(black) recast(rcap)) levels(95) mlw(.3) legend(off)), ///
				bylabel("{bf:Panel D.}" "Share Reassigned (Recruitment)")  ///
			|| ,  drop(_cons) xline(0, lpattern(solid) lcol(black)) aspect(1) coeflab(,labsize(medsmall)) ///
			xlab(-.05(.05).1,labsize(medsmall) nogrid) xtick(, grid ) ytick(,grid glsty(solid) glcol(black%20))  ///
			subtitle(,nobox bexpand justification(left) size(medsmall))  name(g2, replace)
			
			graph combine g1 g2, row(2) imargin(zero) ycommon iscale(.6)
	
			graph export "$figures/Figure_6_COEFPLOT_balancetest_z_FEs.pdf", replace				
	  
}
